#!/usr/bin/python3

#
# HFCL - HybridOS Foundation Class Library
#
# Copyright (C) 2019 Beijing FMSoft Technologies Co., Ltd.
#
# This file is part of HFCL.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#

"""
Make HTML entities table:
    1. Read 'data/entities.json' file.
    2. Generate html entities table.
"""

import os, sys
import time
import re
import json

TOOL_NAME="make_html_entities_table.py"
SRC_FILE="data/entities.json"
DST_FILE="htmlentitiestable.inc"

def write_header(fout):
    fout.write("// This file is auto-generated by using '%s'.\n" % (TOOL_NAME, ))
    fout.write("// Please take care when you modify this file mannually.\n")
    fout.write("\n")

def write_html_entities(fout, entities):

    fout.write("static struct _HtmlEntity {\n")
    fout.write("    const char* token;\n")
    fout.write("    Uchar32     ucs[2];\n")
    fout.write("} _html_entities [] = {\n")

    tokens = list(entities.keys());
    tokens.sort()

    max_token_len = 0
    for i in range(0, len(tokens)):
        token = tokens[i]
#        # skip token not ended with ';'
#        if token[-1] != ';':
#            continue

        token = token.lstrip('&')
#        token = token.rstrip(';')

        token_len = len(token)
        if token_len > max_token_len:
            max_token_len = token_len

#        chars = entities[tokens[i]]['characters']
#        fout.write("    { \"%s\", \"" % (token, ))
#        utf8 = bytearray(chars, 'utf-8')
#        for b in utf8:
#            fout.write("\\x%x" % (b, ))
#
#        if chars == '\n':
#            chars = '\\n'
#        elif chars == '\\':
#            chars = '\\'
#
#        fout.write("\" }, /* %s */\n" % (chars, ))

        characters = entities[tokens[i]]['characters']
        codepoints = entities[tokens[i]]['codepoints']
        fout.write("    { \"%s\", { " % (token, ))
        for cp in codepoints:
            fout.write("0x%06X, " % (cp, ))

        if characters == '\n':
            characters = '\\n'
        elif characters == '\\':
            characters = '\\'

        fout.write("} }, /* %s */\n" % (characters, ))

    fout.write("};\n")
    fout.write("\n")

    fout.write("#define MAX_ENTITY_TOKEN_LEN %d\n" % max_token_len)
    fout.write("\n")

if __name__ == "__main__":
    try:
        fsrc = open(SRC_FILE, "r")
    except:
        print("%s: failed to open input file %s" % (TOOL_NAME, SRC_FILE, ))
        sys.exit(1)

    try:
        fdst = open(DST_FILE, "w")
    except:
        print("%s: failed to open output file %s" % (TOOL_NAME, DST_FILE, ))
        sys.exit(2)

    print("Writting header to dst file %s..." % DST_FILE)
    try:
        write_header(fdst)
    except:
        print("FAILED")
        sys.exit(3)
    print("DONE")

    print("Loading and writting entities to dst file %s..." % DST_FILE)
    entities = json.load(fsrc)
    write_html_entities(fdst, entities)
    print("DONE")

    sys.exit(0)

